rm(list=ls())
#preventing startup messages and call pacage, if it daoesnt exsist, then install and call
suppressPackageStartupMessages(require(xts)) || {install.packages("xts"); require(xts)}
## [1] TRUE
suppressPackageStartupMessages(require(tidyverse)) || {install.packages("tidyverse"); require(tidyverse)}
## [1] TRUE
suppressPackageStartupMessages(require(rvest)) || {install.packages("rvest"); require(rvest)}
## [1] TRUE
suppressPackageStartupMessages(require(anytime)) || {install.packages("anytime"); require(anytime)}
## [1] TRUE
suppressPackageStartupMessages(require(lubridate)) || {install.packages("lubridate"); require(lubridate)}
## [1] TRUE
suppressPackageStartupMessages(require(mosaic)) || {install.packages("mosaic"); require(mosaic)}
## [1] TRUE
suppressPackageStartupMessages(require(stringr)) || {install.packages("stringr"); require(stringr)}
## [1] TRUE
suppressPackageStartupMessages(require(ggvis)) || {install.packages("ggvis"); require(ggvis)}
## [1] TRUE
suppressPackageStartupMessages(require(plotly)) || {install.packages("plotly"); require(plotly)}
## [1] TRUE
suppressPackageStartupMessages(require(dygraphs)) || {install.packages("dygraphs"); require(dygraphs)}
## [1] TRUE
#require(data.table)
#library(quantmod)
#Aluminium<-readRDS("Aluminium_data")
#Copper<-readRDS("copper_data")
Norsk Hydro
dateformat <- col_date(format = "%Y%m%d")
Norsk_hydro <- read_csv("https://www.netfonds.no/quotes/paperhistory.php?paper=NHY.OSE&csv_format=csv", col_types = cols(quote_date = dateformat))
## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 1)
## Warning: 74 parsing failures.
## row # A tibble: 5 x 5 col row col expected actual file expected <int> <chr> <chr> <chr> <chr> actual 1 1269 value an integer 1297767~ 'https://www.netfonds.no/quotes/paperhi~ file 2 2711 value an integer 2761167~ 'https://www.netfonds.no/quotes/paperhi~ row 3 2806 value an integer 2207866~ 'https://www.netfonds.no/quotes/paperhi~ col 4 2807 value an integer 2309111~ 'https://www.netfonds.no/quotes/paperhi~ expected 5 2808 value an integer 3834113~ 'https://www.netfonds.no/quotes/paperhi~
## ... ................. ... .......................................................................... ........ .......................................................................... ...... .......................................................................... .... .......................................................................... ... .......................................................................... ... .......................................................................... ........ ..........................................................................
## See problems(...) for more details.
#checking the reason for "Warning: 74 parsing failures"
summary(Norsk_hydro)
## quote_date paper exch
## Min. :1997-09-17 Length:5303 Length:5303
## 1st Qu.:2003-02-05 Class :character Class :character
## Median :2008-05-19 Mode :character Mode :character
## Mean :2008-05-12
## 3rd Qu.:2013-08-22
## Max. :2018-11-30
##
## open high low close
## Min. :15.28 Min. :15.28 Min. : 2.66 Min. :15.28
## 1st Qu.:24.88 1st Qu.:25.07 1st Qu.:24.61 1st Qu.:24.86
## Median :31.14 Median :31.50 Median :30.65 Median :31.10
## Mean :35.66 Mean :36.07 Mean :35.20 Mean :35.63
## 3rd Qu.:43.79 3rd Qu.:44.28 3rd Qu.:43.26 3rd Qu.:43.76
## Max. :86.00 Max. :86.70 Max. :84.20 Max. :85.60
##
## volume value
## Min. : 30826 Min. :5.610e+05
## 1st Qu.: 5469251 1st Qu.:1.649e+08
## Median : 8421994 Median :2.855e+08
## Mean : 11719913 Mean :3.955e+08
## 3rd Qu.: 14347290 3rd Qu.:4.833e+08
## Max. :517864304 Max. :2.131e+09
## NA's :74
Norsk_hydro <- Norsk_hydro %>% select(c(1,7))
names(Norsk_hydro) <- c("date", "nhy")
glimpse(Norsk_hydro)
## Observations: 5,303
## Variables: 2
## $ date <date> 2018-11-30, 2018-11-29, 2018-11-28, 2018-11-27, 2018-11-...
## $ nhy <dbl> 40.50, 40.95, 41.40, 41.15, 41.79, 41.05, 41.86, 42.12, 4...
# simple ggvis line graph for overview
Norsk_hydro %>%
ggvis(~date, ~nhy) %>%
layer_lines()
print (Norsk_hydro)
## # A tibble: 5,303 x 2
## date nhy
## <date> <dbl>
## 1 2018-11-30 40.5
## 2 2018-11-29 41.0
## 3 2018-11-28 41.4
## 4 2018-11-27 41.2
## 5 2018-11-26 41.8
## 6 2018-11-23 41.0
## 7 2018-11-22 41.9
## 8 2018-11-21 42.1
## 9 2018-11-20 41.3
## 10 2018-11-19 42.0
## # ... with 5,293 more rows
USD<- read_csv("https://www.netfonds.no/quotes/paperhistory.php?paper=USDNOK.FXSX&csv_format=csv ",
col_types = cols(quote_date = dateformat))
#selectingclose values and date
USD <- USD %>% select(c(1,7))
names(USD) <- c("date","NOK_USD")
USD %>%
ggvis(~date,~NOK_USD) %>%
layer_lines()
# the data seems to be lacking from 2006- 2013- but i am keeping it for later analysis
#which is somthing you'll notice if you hover your computer mouse of the straight line
ggplotly(ggplot(USD, aes(date, NOK_USD)) + geom_line(color = "#E7B800"))
NHY <- xts(Norsk_hydro$nhy, order.by = Norsk_hydro$date)
#usd <- xts(USD$NOK_USD, order.by = USD$date, frequency = 365)
#interactive dygraph over nhy data, value during the 2008 crisis is marked
dygraph(NHY) %>%
dyOptions(labelsUTC = TRUE, fillGraph=TRUE, fillAlpha=0.1, drawGrid = FALSE, colors="#00AFBB") %>%
dyRangeSelector(keepMouseZoom = T) %>%
dyCrosshair(direction = "vertical") %>%
dyHighlight(highlightCircleSize = 5, highlightSeriesBackgroundAlpha = 0.2, hideOnMouseOut = FALSE) %>%
dyRoller(rollPeriod = 1) %>%
dyEvent("2008-11-20", "Financial crisis", labelLoc = "top")
# removing
rm(list = "dateformat")
Specifying the url for desired website to be scrapped the following are a link to westmetall market data
# URL to westmetall market data
url_Al <- "https://www.westmetall.com/en/markdaten.php?action=show_table&field=LME_Al_cash"
url_copper <- "https://www.westmetall.com/en/markdaten.php?action=show_table&field=LME_Cu_stock"
# Reading & checking the HTML code from the website
webpage_A <- read_html(url_Al)
webpage_C <- read_html(url_copper)
CSS selectors to scrap desired sections, and creating a tibble
Aluminium<- tibble(date = webpage_A %>% html_nodes("td:nth-child(1)") %>% html_text %>%
str_replace_all("Febuary","February") %>% anydate,
Cash_Settlement= webpage_A %>% html_nodes("td:nth-child(2)") %>% html_text %>%
str_replace_all("\\.",""),
LME_3_month = webpage_A %>% html_nodes("td:nth-child(3)") %>% html_text %>%
str_replace_all("\\.",""),
Stock = webpage_A %>% html_nodes("td:nth-child(4)") %>% html_text %>%
str_replace_all("\\.", "") %>% as.numeric())
Aluminium$Cash_Settlement<- as.numeric(str_replace_all(Aluminium$Cash_Settlement,",","."))
Aluminium$LME_3_month <-as.numeric(str_replace_all(Aluminium$LME_3_month,",","."))
glimpse(Aluminium)
## Observations: 4,185
## Variables: 4
## $ date <date> 2018-11-30, 2018-11-29, 2018-11-28, 2018-11-2...
## $ Cash_Settlement <dbl> 1935.0, 1933.0, 1912.5, 1925.0, 1920.0, 1922.5...
## $ LME_3_month <dbl> 1938.5, 1933.0, 1924.0, 1935.5, 1930.0, 1934.5...
## $ Stock <dbl> 1052450, 1055275, 1057875, 1061700, 1064975, 1...
#saving the commodidity data
saveRDS(Aluminium, file = "Aluminium_data")
Copper <- tibble( date = webpage_C %>% html_nodes("td:nth-child(1)") %>% html_text %>%
str_replace_all("Febuary","February") %>% anydate,
Cash_settlment=webpage_C %>% html_nodes("td:nth-child(2)") %>% html_text %>%
str_replace_all("\\.",""),
LME_3_month = webpage_C %>% html_nodes ("td:nth-child(3)") %>% html_text %>%
str_replace_all("\\.",""),
Stock = webpage_C %>% html_nodes("td:nth-child(4)") %>% html_text %>%
str_replace_all("\\.","") %>% as.numeric())
Copper$Cash_settlment <- as.numeric(str_replace_all(Copper$Cash_settlment,",","."))
Copper$LME_3_month <- as.numeric(str_replace_all(Copper$LME_3_month,",","."))
summary(Copper)
## date Cash_settlment LME_3_month Stock
## Min. :2002-01-02 Min. : 1421 Min. : 1445 Min. : 25525
## 1st Qu.:2006-03-24 1st Qu.: 4088 1st Qu.: 3968 1st Qu.:158300
## Median :2010-06-18 Median : 6305 Median : 6313 Median :276675
## Mean :2010-06-18 Mean : 5726 Mean : 5709 Mean :327280
## 3rd Qu.:2014-09-11 3rd Qu.: 7371 3rd Qu.: 7366 3rd Qu.:431600
## Max. :2018-11-30 Max. :10148 Max. :10124 Max. :980075
print(Copper)
## # A tibble: 4,273 x 4
## date Cash_settlment LME_3_month Stock
## <date> <dbl> <dbl> <dbl>
## 1 2018-11-30 6238 6202 136175
## 2 2018-11-29 6282 6241 136250
## 3 2018-11-28 6198 6157 137325
## 4 2018-11-27 6170. 6136. 137850
## 5 2018-11-26 6246 6219 137100
## 6 2018-11-23 6210 6188 139550
## 7 2018-11-22 6246 6224 143325
## 8 2018-11-21 6242 6218 143275
## 9 2018-11-20 6262 6240 145250
## 10 2018-11-19 6260 6245 151625
## # ... with 4,263 more rows
saveRDS(Copper, file = "copper_data")
rm(list = c("webpage_A","webpage_C", "url_Al","url_copper"))
Almun <- xts(Aluminium$Cash_Settlement, order.by = Aluminium$date, frequency = 365)
Copp <- xts(Copper$Cash_settlment, order.by = Copper$date, frequency = 365)
risk_factors <- cbind(Almun,Copp)
dygraph(risk_factors,ylab="Cash_Settlement",
main="Copper and Aluminium Cash_Settlements") %>%
dySeries("..1",label="Copper") %>%
dySeries("..2",label="Aluminium") %>%
dyOptions(colors = c("blue","brown")) %>%
dyRangeSelector(dateWindow = c("2002-02-01", "2018-12-01"))
# selecting data for further manipulation
Aluminium <- Aluminium %>% select(c(1:2))
Copper<-Copper %>% select(c(1:2))
names(Aluminium) <- c("date", "Aluminium")
names(Copper) <- c("date", "Copper")
rm(list=c("Almun","Copp", "risk_factors", "NHY"))
#joning the data and commodities
nhy_sector<-left_join(Norsk_hydro, Aluminium, by = c("date")) %>%
left_join(.,Copper,by = c("date"))
# some missing data due to lack registrations on this dates
#and becouse the NHY dat streches further back (several years)
summary(nhy_sector)
## date nhy Aluminium Copper
## Min. :1997-09-17 Min. :15.28 Min. :1254 Min. : 1421
## 1st Qu.:2003-02-05 1st Qu.:24.86 1st Qu.:1658 1st Qu.: 4091
## Median :2008-05-19 Median :31.10 Median :1895 Median : 6297
## Mean :2008-05-12 Mean :35.63 Mean :1952 Mean : 5727
## 3rd Qu.:2013-08-22 3rd Qu.:43.76 3rd Qu.:2202 3rd Qu.: 7370
## Max. :2018-11-30 Max. :85.60 Max. :3292 Max. :10148
## NA's :1201 NA's :1117
nhy_sector <- nhy_sector[complete.cases(nhy_sector), ]
# switiching from wide to long
nhy_long <- nhy_sector %>% gather(key = "stock", value = "value", -date)
nhy_long
## # A tibble: 12,303 x 3
## date stock value
## <date> <chr> <dbl>
## 1 2018-11-30 nhy 40.5
## 2 2018-11-29 nhy 41.0
## 3 2018-11-28 nhy 41.4
## 4 2018-11-27 nhy 41.2
## 5 2018-11-26 nhy 41.8
## 6 2018-11-23 nhy 41.0
## 7 2018-11-22 nhy 41.9
## 8 2018-11-21 nhy 42.1
## 9 2018-11-20 nhy 41.3
## 10 2018-11-19 nhy 42.0
## # ... with 12,293 more rows
#na here accurs becouse of the close data for NHY streches back to 1997
# and ome few missing value, du to lack of registration
summary(nhy_long)
## date stock value
## Min. :2002-01-02 Length:12303 Min. : 17.06
## 1st Qu.:2006-03-02 Class :character 1st Qu.: 46.34
## Median :2010-08-18 Mode :character Median : 1812.00
## Mean :2010-07-07 Mean : 2556.03
## 3rd Qu.:2014-10-15 3rd Qu.: 3986.00
## Max. :2018-11-30 Max. :10148.00
#arrange in ascending order
nhy_long <- nhy_long %>% arrange(date)
# calculate the returns of each year
names(nhy_sector)
## [1] "date" "nhy" "Aluminium" "Copper"
nhy_return <- nhy_sector %>% mutate(., nhy_returns=c(NA, diff(log(nhy))),
Aluminium_returns=c(NA, diff(log(Aluminium))),
Copper_returns=c(NA, diff(log(Copper))))
# remove the nas
nhy_return <- nhy_return[complete.cases(nhy_return), ]
#
nhy_return <- nhy_return %>% mutate(nhy_cumulative=cumsum(nhy_returns),
Aluminium_cumlative=cumsum(Aluminium_returns),
Copper_cumulative=cumsum(Copper_returns))
#selecting and adding the returns over time
nhy_long <- nhy_return %>%
select(date,nhy_cumulative,Aluminium_cumlative,Copper_cumulative) %>%
gather(key = "stock",value = "value", -date)
# graph the long dataframe
ggplot(nhy_long, aes(date, value))+
geom_line(aes(color = stock)) +
scale_color_manual(values = c("#00AFBB", "#E7B800","#CC0000"))
#changing to xts format
nhy_cumulative <- xts(nhy_return$nhy_cumulative, order.by = nhy_return$date)
Aluminium_cumulative <- xts(nhy_return$Aluminium_cumlative, order.by = nhy_return$date)
Copper_cumulative <- xts(nhy_return$Copper_cumulative, order.by = nhy_return$date)
#combining columns
cumulative_stocks <- cbind(nhy_cumulative,Aluminium_cumulative,Copper_cumulative)
#creating interactive graph
cum_dygraph <-dygraph(cumulative_stocks, ylab="cumulative",
main="Cumulative data") %>%
dySeries("..1",label="NHY") %>%
dySeries("..2",label="Aluminium") %>%
dySeries("..3",label ="Copper") %>%
dyOptions(colors = c("lightblue","brown", "orange")) %>%
dyEvent("2008-11-20", "Financial crisis", labelLoc = "bottom") %>%
dyShading( "2008-1-1", "2009-6-1","#FFE6E6") %>%
dyShading("2018-1-1", "2018-11-1") %>%
dyRangeSelector(dateWindow = c("2006-02-01", "2018-12-01"))
cum_dygraph
#removing dataframes, since the information is saved within the return dataframe
rm(list=c("cumlative_stocks","nhy_cumulative","Aluminium_cumulative","Copper_cumulative",
"Aluminium","Copper","nhy_sector","Norsk_hydro"))
## Warning in rm(list = c("cumlative_stocks", "nhy_cumulative",
## "Aluminium_cumulative", : object 'cumlative_stocks' not found
# confirming what the obvious , through linaer regression
nhy_return %>%
ggvis(~nhy, ~Aluminium) %>%
layer_points() %>%
layer_model_predictions(model = "lm", se = TRUE)
## Guessing formula = Aluminium ~ nhy